library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
library(p8105.datasets)
data("weather_df")
Recreating scatterplot from viz1
weather_df |>
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5)
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
Making labels
weather_df |>
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
color = "Location",
caption = "Data from the rnoaa package"
)
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
Adding scales
weather_df |>
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
color = "Location",
caption = "Data from the rnoaa package") +
scale_x_continuous(
breaks = c(-15, 0, 15),
labels = c("-15º C", "0", "15"))
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
Even more scale options
weather_df |>
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
color = "Location",
caption = "Data from the rnoaa package") +
scale_x_continuous(
breaks = c(-15, 0, 15),
labels = c("-15ºC", "0", "15"),
limits = c(-20, 30)) +
scale_y_continuous(
trans = "sqrt",
position = "right")
## Warning in transformation$transform(x): NaNs produced
## Warning in scale_y_continuous(trans = "sqrt", position = "right"): sqrt
## transformation introduced infinite values.
## Warning: Removed 142 rows containing missing values or values outside the scale range
## (`geom_point()`).
Analogously to scale_x_* and scale_y_*, there are scales corresponding
to other aesthetics. Some of the most common are used to control the
color aesthetic. For example, arguments to scale_color_hue() control the
color scale and the name in the plot legend.
weather_df |>
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
color = "Location",
caption = "Data from the rnoaa package") +
scale_color_hue(h = c(100, 300))
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
Trying to create new color scheme is too hard, can use
predownloaded.
ggp_temp_plot =
weather_df |>
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
color = "Location",
caption = "Data from the rnoaa package"
) +
viridis::scale_color_viridis(
name = "Location",
discrete = TRUE
)
#We used discrete = TRUE because the color aesthetic is mapped to a discrete variable. In other cases (for example, when color mapped to prcp) you can omit this argument to get a continuous color gradient. The viridis::scale_fill_viridis() function is appropriate for the fill aesthetic used in histograms, density plots, and elsewhere.
ggp_temp_plot
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Themes
Updating base plot…
ggp_temp_plot +
theme(legend.position = "bottom")
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
central_park_df =
weather_df |>
filter(name == "CentralPark_NY")
molokai_df =
weather_df |>
filter(name == "Molokai_HI")
ggplot(data = molokai_df, aes(x = date, y = tmax, color = name)) +
geom_point() +
geom_line(data = central_park_df)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Patchwork Sometimes, though, you want to show two or three
fundamentally different plots in the same graphic: you may want to
juxtapose a scatterplot and a boxplot, or show scatterplots illustrating
relationships between different variables. In this case, a solution is
to create each of the panels you want separately and combine panels
using tools in the patchwork package:
Make three plots and combine using patchwork
ggp_tmax_tmin =
weather_df |>
ggplot(aes(x=tmin, y =tmax, color = name)) +
geom_point(alpha = 0.5)
ggp_prec_density =
weather_df |>
filter(prcp > 0) |>
ggplot(aes(x = prcp, fill =name))+
geom_density(alpha=0.5)
ggp_temp_season =
weather_df |>
ggplot(aes(x=date, y=tmax, color=name))+
geom_point(alpha = 0.5) +
geom_smooth(se = FALSE) +
theme(legend.position = "bottom")
(ggp_tmax_tmin + ggp_prec_density)/ggp_temp_season
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
Let’s make temperature violin plots.
weather_df |>
mutate(name = fct_relevel(name, c("Molokai_HI", "CentralPark_NY", "Waterhole_WA"))) |> #factor orders variables
ggplot(aes(x=name,y=tmax,fill=name))+
geom_violin(apla=0.5)
## Warning in geom_violin(apla = 0.5): Ignoring unknown parameters: `apla`
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
Used factoring above to reorder
weather_df |>
mutate(name=fct_reorder(name, tmax)) |>
ggplot(aes(x=name,y=tmax,fill=name))+
geom_violin(alpha = 0.5)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `name = fct_reorder(name, tmax)`.
## Caused by warning:
## ! `fct_reorder()` removing 17 missing values.
## ℹ Use `.na_rm = TRUE` to silence this message.
## ℹ Use `.na_rm = FALSE` to preserve NAs.
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
Data tidiness, creating it
pulse_df =
haven::read_sas("data_import_examples/public_pulse_data.sas7bdat") |>
janitor::clean_names() |>
pivot_longer(
bdi_score_bl:bdi_score_12m,
names_to = "visit",
names_prefix= "bdi_score_",
values_to = "bdi"
) |> #The pivot_longer() function in R's tidyr package transforms data from a "wide" format to a "long" format. This means it takes multiple columns that represent different measurements or variables and converts them into two new columns: one containing the original column names (now as values), and another containing the corresponding values from those original columns.
mutate(visit = fct_inorder(visit))
pulse_df |>
ggplot(aes(x=visit, y=bdi)) +
geom_boxplot()
## Warning: Removed 879 rows containing non-finite outside the scale range
## (`stat_boxplot()`).